import numpy as np
def compute_depth(order_book_side):
"""Compute total depth (total quantity available) on one side of the book."""
return sum(q for _, q in order_book_side)
def compute_slope(order_book_side):
"""Compute the slope of the bid or ask curve."""
if len(order_book_side) < 2:
return None # Not enough data to compute slope
p1, q1 = order_book_side[0]
p2, _ = order_book_side[1]
return (p1 - p2) / q1 if q1 > 0 else None
def compute_quantity_weighted_price(order_book_side):
"""Compute the quantity-weighted price for bid or ask."""
total_quantity = sum(q for _, q in order_book_side)
if total_quantity == 0:
return None
weighted_price = sum(p * q for p, q in order_book_side) / total_quantity
return weighted_price
def compute_quantity_weighted_mid_quote(bid_side, ask_side):
"""Compute the quantity-weighted mid-quote."""
wp_bid = compute_quantity_weighted_price(bid_side)
wp_ask = compute_quantity_weighted_price(ask_side)
if wp_bid is None or wp_ask is None:
return None
return (wp_bid + wp_ask) / 2
def compute_quantity_weighted_bid_ask_spread(bid_side, ask_side):
"""Compute the quantity-weighted bid-ask spread."""
wp_bid = compute_quantity_weighted_price(bid_side)
wp_ask = compute_quantity_weighted_price(ask_side)
if wp_bid is None or wp_ask is None:
return None
return wp_ask - wp_bid
def compute_mid_quote_difference(mid_quote, bid_side, ask_side):
"""Compute the difference between mid-quote and quantity-weighted mid-quote."""
wmid = compute_quantity_weighted_mid_quote(bid_side, ask_side)
if wmid is None:
return None
return mid_quote - wmid
def get_order_book(timestamp, df):
"""Extracts bid and ask order book lists sorted by price"""
df_time = df[df['Timestamp'] == timestamp]
# Extract and sort bid side (highest price first)
bid_side = df_time[df_time['Side'] == 'bid'][['Price', 'Size']].sort_values(by='Price', ascending=False)
bid_side = list(bid_side.itertuples(index=False, name=None)) # Convert to list of tuples
# Extract and sort ask side (lowest price first)
ask_side = df_time[df_time['Side'] == 'ask'][['Price', 'Size']].sort_values(by='Price', ascending=True)
ask_side = list(ask_side.itertuples(index=False, name=None)) # Convert to list of tuples
return bid_side, ask_side
import pandas as pd
def compute_orderbook_changes(orderbook_df):
"""
Computes the changes in the order book at each timestamp sequentially.
Parameters:
- orderbook_df: DataFrame with ['Price', 'Size', 'Side', 'Timestamp'].
Returns:
- DataFrame showing changes in order book per timestamp, including previous size.
"""
# Convert timestamp to datetime if not already
orderbook_df['Timestamp'] = pd.to_datetime(orderbook_df['Timestamp'])
# Sort data by timestamp and price for consistent comparison
orderbook_df = orderbook_df.sort_values(by=["Timestamp", "Price"]).reset_index(drop=True).drop_duplicates()
# List to store changes
changes = []
# Unique timestamps sorted
timestamps = orderbook_df['Timestamp'].unique()
for i in range(len(timestamps) - 1):
t1, t2 = timestamps[i], timestamps[i + 1]
# Order books at two consecutive timestamps
ob_t1 = orderbook_df[orderbook_df['Timestamp'] == t1].set_index(['Price', 'Side'])['Size']
ob_t2 = orderbook_df[orderbook_df['Timestamp'] == t2].set_index(['Price', 'Side'])['Size']
# Compute changes
size_changes = ob_t2.subtract(ob_t1, fill_value=0)
# Store only nonzero changes along with previous size
for (price, side), change in size_changes.items():
if change != 0:
prev_size = ob_t1.get((price, side), 0) # Get previous size, default to 0 if not present
new_size = ob_t2.get((price, side), 0) # Get new size
changes.append([t2, price, side, prev_size, new_size, change])
# Convert to DataFrame
changes_df = pd.DataFrame(changes, columns=['Timestamp', 'Price', 'Side', 'Prev_Size', 'New_Size', 'Size_Change'])
return changes_df
The total depth on one side of the order book is the sum of all available quantities:
$ \text{Depth} = \sum_{i} q_i $
where $q_i$ is the quantity available at each price level $i$.
The slope of the bid or ask curve measures how quickly the price changes with respect to quantity:
$ \text{Slope}_{\text{bid}} = \frac{p_{\text{bid},1} - p_{\text{bid},2}}{q_{\text{bid},1}} $
where:
The ask slope is defined analogously.
The quantity-weighted price for bids or asks is:
$ \text{WP}_{\text{side}} = \frac{\sum_{i} p_{\text{side},i} \cdot q_{\text{side},i}}{\sum_{i} q_{\text{side},i}} $
where:
The quantity-weighted mid-quote is the average of the quantity-weighted bid and ask prices:
$ \text{WMid} = \frac{\text{WP}_{\text{bid}} + \text{WP}_{\text{ask}}}{2} $
where:
The quantity-weighted bid-ask spread is given by:
$ \text{WSpread} = \text{WP}_{\text{ask}} - \text{WP}_{\text{bid}} $
which represents the difference between the quantity-weighted ask and bid prices.
The difference between the regular mid-quote and the quantity-weighted mid-quote is:
$ \text{Mid-Quote Difference} = \text{MidQuote} - \text{WMid} $
where $\text{MidQuote}$ is the traditional mid-point between the best bid and ask prices.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import json
import warnings
warnings.simplefilter("ignore")
# Open and load the JSON file
with open("matchup_details.json", "r") as file:
matchups = json.load(file) # Parse JSON into a Python dictionary
matchups_list = [(i.split(' ')[0], i.split(' ')[2]) for i in list(matchups.keys())]
# Dictionary to store where each team appears in the filtered list
team_indices = {}
filtered_matchups = []
for i, (team1, team2) in enumerate(matchups_list):
if team1 in team_indices or team2 in team_indices:
old_index = team_indices.get(team1, team_indices.get(team2))
if old_index is not None:
filtered_matchups.pop(old_index)
for key in team_indices:
if team_indices[key] > old_index:
team_indices[key] -= 1
team_indices[team1] = len(filtered_matchups)
team_indices[team2] = len(filtered_matchups)
filtered_matchups.append((team1, team2))
id_filtered = [matchups[" vs. ".join(i)]['outcomes'] for i in filtered_matchups if " vs. ".join(i) in matchups]
keys = list(id_filtered[2].keys())
# comparison = df.sort_values(['Timestamp', 'Price']).drop_duplicates()
keys = list(id_filtered[3].keys())
gamelist = [list(i.keys()) for i in id_filtered][2:]
for keys in gamelist:
team_data_changes = {}
for team in keys:
df = pd.read_parquet(team + '.parquet')
bid_df = df[df.Side == 'bid']
ask_df = df[df.Side == 'ask']
bid_changes = compute_orderbook_changes(bid_df)
ask_changes = compute_orderbook_changes(ask_df)
team_data_changes[team] = pd.concat([bid_changes, ask_changes])
fig, axes = plt.subplots(1, 2, figsize=(30, 8), sharex=True, sharey=True, facecolor=(1,1,1))
for i, team in enumerate(keys[:2]): # Only plot top two
col = i
bid_data_changes = team_data_changes[team][team_data_changes[team].Side == 'bid']
ask_data_changes = team_data_changes[team][team_data_changes[team].Side == 'ask']
scatter_bid = axes[col].scatter(
bid_data_changes["Timestamp"], bid_data_changes["Price"],
c=bid_data_changes["Size_Change"], cmap="coolwarm", s=200,
edgecolors="k", marker='s', label="Bid Changes"
)
scatter_ask = axes[col].scatter(
ask_data_changes["Timestamp"], ask_data_changes["Price"],
c=ask_data_changes["Size_Change"], cmap="coolwarm", s=200,
edgecolors="k", marker='o', label="Ask Changes"
)
timestamps = sorted(team_data_changes[team]['Timestamp'].unique())
bid_depths, ask_depths, mid_quotes, bid_ask_spreads = [], [], [], []
for timestamp in timestamps:
bid_side, ask_side = get_order_book(timestamp, pd.read_parquet(f'{team}.parquet').drop_duplicates())
bid_depths.append(compute_depth(bid_side))
ask_depths.append(compute_depth(ask_side))
mid_quotes.append(compute_quantity_weighted_mid_quote(bid_side, ask_side))
bid_ask_spreads.append(compute_quantity_weighted_bid_ask_spread(bid_side, ask_side))
timestamps_num = [t.timestamp() for t in timestamps]
axes[col].plot(timestamps, mid_quotes, label="Quantity-Weighted Mid-Quote", marker='o', color='black', markersize=10)
axes[col].plot(timestamps, bid_ask_spreads, label="Quantity-Weighted Bid-Ask Spread", marker='s', color='red', markersize=12)
axes[col].set_xlabel("Timestamp", fontsize=25)
axes[col].set_ylabel("Price Level", fontsize=25)
axes[col].set_title(f"{team} - Bid & Ask Changes", fontsize=35)
axes[col].tick_params(axis="x", rotation=45)
axes[col].grid(True)
handles, labels = axes[0].get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', ncol=10, fontsize=20)
plt.suptitle(f'Order Book Visualization for {team}', fontsize=40)
plt.tight_layout(rect=[0, 0.1, 1, 1])
plt.show()